{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# <div style='color:#6f67e0;font-weight:800;'>隔多少步保存模型</div>\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "from nes_py.wrappers import JoypadSpace\n",
    "import gym_super_mario_bros\n",
    "from gym_super_mario_bros.actions import SIMPLE_MOVEMENT\n",
    "import time\n",
    "from matplotlib import pyplot as plt\n",
    "from gym.wrappers import GrayScaleObservation\n",
    "from stable_baselines3.common.monitor import Monitor\n",
    "from stable_baselines3.common.vec_env import DummyVecEnv\n",
    "from stable_baselines3.common.vec_env import VecFrameStack\n",
    "import os\n",
    "from stable_baselines3 import PPO\n",
    "\n",
    "from stable_baselines3.common.results_plotter import load_results, ts2xy\n",
    "import numpy as np\n",
    "from stable_baselines3.common.callbacks import BaseCallback\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "env = gym_super_mario_bros.make('SuperMarioBros-v0')\n",
    "env = JoypadSpace(env, SIMPLE_MOVEMENT)\n",
    "\n",
    "monitor_dir = r'./monitor_log/'\n",
    "os.makedirs(monitor_dir,exist_ok=True)\n",
    "env = Monitor(env,monitor_dir)\n",
    "\n",
    "env = GrayScaleObservation(env,keep_dim=True)\n",
    "env = DummyVecEnv([lambda: env])\n",
    "env = VecFrameStack(env,4,channels_order='last')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using cuda device\n",
      "Wrapping the env in a VecTransposeImage.\n"
     ]
    }
   ],
   "source": [
    "tensorboard_log = r'./tensorboard_log/'\n",
    "learning_rate = 1e-6\n",
    "n_steps = 128\n",
    "model = PPO(\"CnnPolicy\", env, verbose=1,\n",
    "            tensorboard_log = tensorboard_log,\n",
    "            learning_rate = learning_rate,\n",
    "            n_steps = n_steps)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SaveOnBestTrainingRewardCallback(BaseCallback):\n",
    "    \"\"\"\n",
    "    Callback for saving a model (the check is done every ``check_freq`` steps)\n",
    "    based on the training reward (in practice, we recommend using ``EvalCallback``).\n",
    "\n",
    "    :param check_freq: (int)\n",
    "    :param log_dir: (str) Path to the folder where the model will be saved.\n",
    "      It must contains the file created by the ``Monitor`` wrapper.\n",
    "    :param verbose: (int)\n",
    "    \"\"\"\n",
    "\n",
    "    def __init__(self, check_freq, save_model_dir, verbose=1):\n",
    "        super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)\n",
    "        self.check_freq = check_freq\n",
    "        self.save_path = os.path.join(save_model_dir, 'best_model/')\n",
    "        self.best_mean_reward = -np.inf\n",
    "\n",
    "    # def _init_callback(self) -> None:\n",
    "    def _init_callback(self):\n",
    "        # Create folder if needed\n",
    "        if self.save_path is not None:\n",
    "            os.makedirs(self.save_path, exist_ok=True)\n",
    "\n",
    "    # def _on_step(self) -> bool:\n",
    "    def _on_step(self):\n",
    "        if self.n_calls % self.check_freq == 0:\n",
    "            print('self.n_calls: ',self.n_calls)\n",
    "            model_path1 = os.path.join(self.save_path, 'model_{}'.format(self.n_calls))\n",
    "            self.model.save(model_path1)\n",
    "\n",
    "        return True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Logging to ./tensorboard_log/PPO_1\n",
      "----------------------------\n",
      "| time/              |     |\n",
      "|    fps             | 12  |\n",
      "|    iterations      | 1   |\n",
      "|    time_elapsed    | 10  |\n",
      "|    total_timesteps | 128 |\n",
      "----------------------------\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\software\\e_anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 19            |\n",
      "|    iterations           | 2             |\n",
      "|    time_elapsed         | 12            |\n",
      "|    total_timesteps      | 256           |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 3.3341348e-07 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.00407      |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 202           |\n",
      "|    n_updates            | 10            |\n",
      "|    policy_gradient_loss | -3.95e-06     |\n",
      "|    value_loss           | 432           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 25            |\n",
      "|    iterations           | 3             |\n",
      "|    time_elapsed         | 15            |\n",
      "|    total_timesteps      | 384           |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 4.9639493e-07 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.0112       |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 411           |\n",
      "|    n_updates            | 20            |\n",
      "|    policy_gradient_loss | -2.77e-05     |\n",
      "|    value_loss           | 891           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 28            |\n",
      "|    iterations           | 4             |\n",
      "|    time_elapsed         | 17            |\n",
      "|    total_timesteps      | 512           |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 5.3690746e-07 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | 0.00237       |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 158           |\n",
      "|    n_updates            | 30            |\n",
      "|    policy_gradient_loss | -6.49e-06     |\n",
      "|    value_loss           | 284           |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 31            |\n",
      "|    iterations           | 5             |\n",
      "|    time_elapsed         | 20            |\n",
      "|    total_timesteps      | 640           |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 7.6182187e-07 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.00424      |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.717         |\n",
      "|    n_updates            | 40            |\n",
      "|    policy_gradient_loss | -0.000109     |\n",
      "|    value_loss           | 2.27          |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 34            |\n",
      "|    iterations           | 6             |\n",
      "|    time_elapsed         | 22            |\n",
      "|    total_timesteps      | 768           |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 2.8405339e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.0687       |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.426         |\n",
      "|    n_updates            | 50            |\n",
      "|    policy_gradient_loss | -0.000199     |\n",
      "|    value_loss           | 1.63          |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 36            |\n",
      "|    iterations           | 7             |\n",
      "|    time_elapsed         | 24            |\n",
      "|    total_timesteps      | 896           |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 3.5045668e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.0256       |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.237         |\n",
      "|    n_updates            | 60            |\n",
      "|    policy_gradient_loss | -0.000269     |\n",
      "|    value_loss           | 0.787         |\n",
      "-------------------------------------------\n",
      "self.n_calls:  1000\n",
      "------------------------------------------\n",
      "| time/                   |              |\n",
      "|    fps                  | 36           |\n",
      "|    iterations           | 8            |\n",
      "|    time_elapsed         | 27           |\n",
      "|    total_timesteps      | 1024         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 1.071021e-06 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.95        |\n",
      "|    explained_variance   | -0.0211      |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.189        |\n",
      "|    n_updates            | 70           |\n",
      "|    policy_gradient_loss | 4.82e-05     |\n",
      "|    value_loss           | 0.654        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 38            |\n",
      "|    iterations           | 9             |\n",
      "|    time_elapsed         | 30            |\n",
      "|    total_timesteps      | 1152          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 2.1508895e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | 0.0266        |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.153         |\n",
      "|    n_updates            | 80            |\n",
      "|    policy_gradient_loss | -0.000268     |\n",
      "|    value_loss           | 0.439         |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| time/                   |              |\n",
      "|    fps                  | 39           |\n",
      "|    iterations           | 10           |\n",
      "|    time_elapsed         | 32           |\n",
      "|    total_timesteps      | 1280         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 7.515773e-07 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.95        |\n",
      "|    explained_variance   | 0.0196       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0826       |\n",
      "|    n_updates            | 90           |\n",
      "|    policy_gradient_loss | -5.69e-05    |\n",
      "|    value_loss           | 0.312        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 40            |\n",
      "|    iterations           | 11            |\n",
      "|    time_elapsed         | 34            |\n",
      "|    total_timesteps      | 1408          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 1.4523976e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | 0.0647        |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.192         |\n",
      "|    n_updates            | 100           |\n",
      "|    policy_gradient_loss | -0.000139     |\n",
      "|    value_loss           | 0.418         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 41            |\n",
      "|    iterations           | 12            |\n",
      "|    time_elapsed         | 37            |\n",
      "|    total_timesteps      | 1536          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 2.1690503e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | 0.00582       |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.189         |\n",
      "|    n_updates            | 110           |\n",
      "|    policy_gradient_loss | -0.000208     |\n",
      "|    value_loss           | 0.467         |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| time/                   |              |\n",
      "|    fps                  | 42           |\n",
      "|    iterations           | 13           |\n",
      "|    time_elapsed         | 39           |\n",
      "|    total_timesteps      | 1664         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 1.124572e-06 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.95        |\n",
      "|    explained_variance   | 0.0139       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0844       |\n",
      "|    n_updates            | 120          |\n",
      "|    policy_gradient_loss | -5.87e-05    |\n",
      "|    value_loss           | 0.204        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| time/                   |              |\n",
      "|    fps                  | 42           |\n",
      "|    iterations           | 14           |\n",
      "|    time_elapsed         | 41           |\n",
      "|    total_timesteps      | 1792         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 9.592623e-07 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.95        |\n",
      "|    explained_variance   | -0.00246     |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0956       |\n",
      "|    n_updates            | 130          |\n",
      "|    policy_gradient_loss | -4.08e-05    |\n",
      "|    value_loss           | 0.256        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 43            |\n",
      "|    iterations           | 15            |\n",
      "|    time_elapsed         | 44            |\n",
      "|    total_timesteps      | 1920          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 1.4905818e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | 0.0415        |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.069         |\n",
      "|    n_updates            | 140           |\n",
      "|    policy_gradient_loss | -4.83e-05     |\n",
      "|    value_loss           | 0.188         |\n",
      "-------------------------------------------\n",
      "self.n_calls:  2000\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 43            |\n",
      "|    iterations           | 16            |\n",
      "|    time_elapsed         | 47            |\n",
      "|    total_timesteps      | 2048          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 8.3167106e-07 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | 0.0165        |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.0537        |\n",
      "|    n_updates            | 150           |\n",
      "|    policy_gradient_loss | -5.71e-05     |\n",
      "|    value_loss           | 0.153         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 43            |\n",
      "|    iterations           | 17            |\n",
      "|    time_elapsed         | 49            |\n",
      "|    total_timesteps      | 2176          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 1.0207295e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.00134      |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.0462        |\n",
      "|    n_updates            | 160           |\n",
      "|    policy_gradient_loss | -6.23e-05     |\n",
      "|    value_loss           | 0.128         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 44            |\n",
      "|    iterations           | 18            |\n",
      "|    time_elapsed         | 51            |\n",
      "|    total_timesteps      | 2304          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 1.0975637e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.00196      |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.062         |\n",
      "|    n_updates            | 170           |\n",
      "|    policy_gradient_loss | -2.45e-05     |\n",
      "|    value_loss           | 0.154         |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| time/                   |              |\n",
      "|    fps                  | 44           |\n",
      "|    iterations           | 19           |\n",
      "|    time_elapsed         | 54           |\n",
      "|    total_timesteps      | 2432         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 6.556511e-07 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.95        |\n",
      "|    explained_variance   | 0.00282      |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0768       |\n",
      "|    n_updates            | 180          |\n",
      "|    policy_gradient_loss | -4.89e-05    |\n",
      "|    value_loss           | 0.149        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 44            |\n",
      "|    iterations           | 20            |\n",
      "|    time_elapsed         | 56            |\n",
      "|    total_timesteps      | 2560          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 2.9620714e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | 0.0113        |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.0566        |\n",
      "|    n_updates            | 190           |\n",
      "|    policy_gradient_loss | -0.000193     |\n",
      "|    value_loss           | 0.105         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 45            |\n",
      "|    iterations           | 21            |\n",
      "|    time_elapsed         | 59            |\n",
      "|    total_timesteps      | 2688          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 9.0990216e-07 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.00335      |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.183         |\n",
      "|    n_updates            | 200           |\n",
      "|    policy_gradient_loss | 2.12e-05      |\n",
      "|    value_loss           | 0.275         |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| time/                   |              |\n",
      "|    fps                  | 45           |\n",
      "|    iterations           | 22           |\n",
      "|    time_elapsed         | 61           |\n",
      "|    total_timesteps      | 2816         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 2.914574e-06 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.95        |\n",
      "|    explained_variance   | 0.0407       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.04         |\n",
      "|    n_updates            | 210          |\n",
      "|    policy_gradient_loss | -0.000226    |\n",
      "|    value_loss           | 0.0975       |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| time/                   |              |\n",
      "|    fps                  | 45           |\n",
      "|    iterations           | 23           |\n",
      "|    time_elapsed         | 64           |\n",
      "|    total_timesteps      | 2944         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 4.479196e-06 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.95        |\n",
      "|    explained_variance   | 0.00222      |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0742       |\n",
      "|    n_updates            | 220          |\n",
      "|    policy_gradient_loss | -0.000235    |\n",
      "|    value_loss           | 0.147        |\n",
      "------------------------------------------\n",
      "self.n_calls:  3000\n",
      "------------------------------------------\n",
      "| time/                   |              |\n",
      "|    fps                  | 45           |\n",
      "|    iterations           | 24           |\n",
      "|    time_elapsed         | 67           |\n",
      "|    total_timesteps      | 3072         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 1.209788e-06 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.95        |\n",
      "|    explained_variance   | 0.0051       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.339        |\n",
      "|    n_updates            | 230          |\n",
      "|    policy_gradient_loss | -2.57e-06    |\n",
      "|    value_loss           | 0.686        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 45            |\n",
      "|    iterations           | 25            |\n",
      "|    time_elapsed         | 70            |\n",
      "|    total_timesteps      | 3200          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 1.8835999e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | 0.0395        |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.0469        |\n",
      "|    n_updates            | 240           |\n",
      "|    policy_gradient_loss | -0.000126     |\n",
      "|    value_loss           | 0.0871        |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 45            |\n",
      "|    iterations           | 26            |\n",
      "|    time_elapsed         | 72            |\n",
      "|    total_timesteps      | 3328          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 1.6959384e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.00157      |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.107         |\n",
      "|    n_updates            | 250           |\n",
      "|    policy_gradient_loss | -8.86e-05     |\n",
      "|    value_loss           | 0.218         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 45            |\n",
      "|    iterations           | 27            |\n",
      "|    time_elapsed         | 75            |\n",
      "|    total_timesteps      | 3456          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 5.3844415e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | 0.00734       |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.147         |\n",
      "|    n_updates            | 260           |\n",
      "|    policy_gradient_loss | -0.000455     |\n",
      "|    value_loss           | 0.292         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 45            |\n",
      "|    iterations           | 28            |\n",
      "|    time_elapsed         | 78            |\n",
      "|    total_timesteps      | 3584          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 4.0642917e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.0178       |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.0697        |\n",
      "|    n_updates            | 270           |\n",
      "|    policy_gradient_loss | -0.000179     |\n",
      "|    value_loss           | 0.176         |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| time/                   |              |\n",
      "|    fps                  | 45           |\n",
      "|    iterations           | 29           |\n",
      "|    time_elapsed         | 81           |\n",
      "|    total_timesteps      | 3712         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 4.282687e-06 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.95        |\n",
      "|    explained_variance   | 0.00802      |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.0604       |\n",
      "|    n_updates            | 280          |\n",
      "|    policy_gradient_loss | -0.000278    |\n",
      "|    value_loss           | 0.145        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 45            |\n",
      "|    iterations           | 30            |\n",
      "|    time_elapsed         | 84            |\n",
      "|    total_timesteps      | 3840          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 1.7480925e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.00868      |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.0692        |\n",
      "|    n_updates            | 290           |\n",
      "|    policy_gradient_loss | -0.0001       |\n",
      "|    value_loss           | 0.154         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 45            |\n",
      "|    iterations           | 31            |\n",
      "|    time_elapsed         | 86            |\n",
      "|    total_timesteps      | 3968          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 1.2973323e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.0155       |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.055         |\n",
      "|    n_updates            | 300           |\n",
      "|    policy_gradient_loss | -6.73e-05     |\n",
      "|    value_loss           | 0.136         |\n",
      "-------------------------------------------\n",
      "self.n_calls:  4000\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 45            |\n",
      "|    iterations           | 32            |\n",
      "|    time_elapsed         | 90            |\n",
      "|    total_timesteps      | 4096          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 3.9474107e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.0161       |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.059         |\n",
      "|    n_updates            | 310           |\n",
      "|    policy_gradient_loss | -0.000223     |\n",
      "|    value_loss           | 0.127         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 45            |\n",
      "|    iterations           | 33            |\n",
      "|    time_elapsed         | 92            |\n",
      "|    total_timesteps      | 4224          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 1.3997778e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | 0.0278        |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.061         |\n",
      "|    n_updates            | 320           |\n",
      "|    policy_gradient_loss | -9.21e-05     |\n",
      "|    value_loss           | 0.153         |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 45            |\n",
      "|    iterations           | 34            |\n",
      "|    time_elapsed         | 95            |\n",
      "|    total_timesteps      | 4352          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 2.3352914e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | 0.00844       |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.146         |\n",
      "|    n_updates            | 330           |\n",
      "|    policy_gradient_loss | -0.000228     |\n",
      "|    value_loss           | 0.28          |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 45            |\n",
      "|    iterations           | 35            |\n",
      "|    time_elapsed         | 97            |\n",
      "|    total_timesteps      | 4480          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 1.1175871e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | 0.0104        |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.175         |\n",
      "|    n_updates            | 340           |\n",
      "|    policy_gradient_loss | -8.84e-05     |\n",
      "|    value_loss           | 0.303         |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| time/                   |              |\n",
      "|    fps                  | 45           |\n",
      "|    iterations           | 36           |\n",
      "|    time_elapsed         | 100          |\n",
      "|    total_timesteps      | 4608         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 1.072418e-06 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.95        |\n",
      "|    explained_variance   | 0.02         |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.136        |\n",
      "|    n_updates            | 350          |\n",
      "|    policy_gradient_loss | -9.65e-05    |\n",
      "|    value_loss           | 0.399        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| time/                   |              |\n",
      "|    fps                  | 46           |\n",
      "|    iterations           | 37           |\n",
      "|    time_elapsed         | 102          |\n",
      "|    total_timesteps      | 4736         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 2.375804e-06 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.95        |\n",
      "|    explained_variance   | 0.0217       |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.187        |\n",
      "|    n_updates            | 360          |\n",
      "|    policy_gradient_loss | -0.000168    |\n",
      "|    value_loss           | 0.347        |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 46            |\n",
      "|    iterations           | 38            |\n",
      "|    time_elapsed         | 105           |\n",
      "|    total_timesteps      | 4864          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 3.5562553e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | -0.0161       |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.068         |\n",
      "|    n_updates            | 370           |\n",
      "|    policy_gradient_loss | -0.000303     |\n",
      "|    value_loss           | 0.21          |\n",
      "-------------------------------------------\n",
      "-------------------------------------------\n",
      "| time/                   |               |\n",
      "|    fps                  | 46            |\n",
      "|    iterations           | 39            |\n",
      "|    time_elapsed         | 107           |\n",
      "|    total_timesteps      | 4992          |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 2.0810403e-06 |\n",
      "|    clip_fraction        | 0             |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.95         |\n",
      "|    explained_variance   | 0.00568       |\n",
      "|    learning_rate        | 1e-06         |\n",
      "|    loss                 | 0.146         |\n",
      "|    n_updates            | 380           |\n",
      "|    policy_gradient_loss | -0.000185     |\n",
      "|    value_loss           | 0.256         |\n",
      "-------------------------------------------\n",
      "self.n_calls:  5000\n",
      "------------------------------------------\n",
      "| time/                   |              |\n",
      "|    fps                  | 45           |\n",
      "|    iterations           | 40           |\n",
      "|    time_elapsed         | 111          |\n",
      "|    total_timesteps      | 5120         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 2.910383e-06 |\n",
      "|    clip_fraction        | 0            |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.95        |\n",
      "|    explained_variance   | -0.0232      |\n",
      "|    learning_rate        | 1e-06        |\n",
      "|    loss                 | 0.275        |\n",
      "|    n_updates            | 390          |\n",
      "|    policy_gradient_loss | -0.000248    |\n",
      "|    value_loss           | 0.653        |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<stable_baselines3.ppo.ppo.PPO at 0x1da4abad5e0>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "save_model_dir = r'F:\\\\RL_Mario1\\\\'\n",
    "callback1 = SaveOnBestTrainingRewardCallback(1000, save_model_dir)\n",
    "\n",
    "\n",
    "model.learn(total_timesteps=5000,callback=callback1)\n",
    "# model.save(\"mario_model\")"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "9465aae7e0ab1403d672807d1a0963d86dbda2f584fbe3054c36cf78311c6c77"
  },
  "kernelspec": {
   "display_name": "Python 3.8.11 ('pytorch')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.11"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
